from pyspark import SparkConf,SparkContext
import os
os.environ["PYSPARK_PYTHON"] = "D:/Python3.10.7/python.exe"

conf = SparkConf().setMaster("local[*]").setAppName("My App")
sc = SparkContext(conf=conf)

#distinct案例
rdd = sc.parallelize([1,2,3,4,5,5,4,5,6,7,8,9,10])
print(rdd.distinct().collect()) #[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
sc.stop()